import requests,fake_useragent
from bs4 import BeautifulSoup as bs
import xlsxwriter
import time,random,sys
useragent=fake_useragent.UserAgent().chrome
header={'User-Agent':useragent}
url='https://www.ebi.ac.uk/chebi/chebiOntology.do?chebiId=CHEBI:22587'
response=requests.get(url=url,headers=header)
with open('ChEBI.html','wb') as f:
    f.write(response.content)
with open('ChEBI.html','r') as f:
    soup=bs(f.read(),'lxml')
results=soup.find_all('div',class_='bigGallery')
total=len(results)
out=xlsxwriter.Workbook('ChEBI.xlsx')
sheet=out.add_worksheet()
format=out.add_format({
    'bold':False,
    'border':1,
    'align':'center',
    'valign':'vcenter',
})
tabhead=['ID','Name','Mass','Formula','Image','SMILE','Details']
sheet.write_row(0,0,tabhead,cell_format=format)
sys.stdout.write('0/'+str(total)+'\r')
for i in range(total):
    infos=results[i].text.split('\n')
    name=infos[1]
    if i == 9 or i == 129:
        mass,formula='',''
    else:
        mass=infos[2][6:]
        formula=infos[7].strip()
    src=str(results[i].find('a'))
    sign=src.index('>')-1
    ID=src[80:sign]
    image='https://www.ebi.ac.uk/chebi/displayImage.do?defaultImage=true&imageIndex=0&chebiId='+str(ID)
    link='https://www.ebi.ac.uk/chebi/searchId.do?chebiId='+str(ID)
    try:
        response=requests.get(url=link,headers=header)
        gap=random.randint(3,5)+random.random()
        time.sleep(gap)
        soup=bs(response.content,'lxml')
        smile=soup.find('td',text='SMILES').find_next_sibling().text
    except Exception:
        smile=''
        with open('ErrorLog.txt','a') as f:
            f.write(ID+'link')
    data=[ID,name,mass,formula,image,smile,link]
    sheet.write_row(i+1,0,data,cell_format=format)
    sys.stdout.write(str(i+1)+'/'+str(total)+'\r')
out.close()
print('Fetch Finish.')
